package com.company.MovieAnalyse.analyse2;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.regex.Pattern;

public class WordCountMapper extends Mapper<LongWritable,Text,Text,LongWritable> {
    @Override
    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] split = line.split("\t");
        for (String word : split[6].split("")) {

            boolean matches = Pattern.compile("[\\u4e00-\\u9fa5]").matcher(word).matches();
            if (matches){
                context.write(new Text(word),new LongWritable(1));
            }
        }

    }
}
